**
** This script merges the survival and readmission outcomes (assembled in makedata_) 
** to the hospital data we have in `runbuild_.do'
**
** Primary input: files in the format `cond'`sampsize'.dta
** 	- Info on survival and readm outcomes at the hospital-index year level
**
** 
** Output: survival and readmission outcomes at the hospital-year level re-indexed to  AHA IDs
** 	 - Saved to stacked_temp.dta 
** 
**


set more off 
capture log close 
clear 


** Specify directories
** main analytic file 
local fpath_data_main = "/disk/agedisk4/medicare.work/sacarny-DUA51934/shruthi-dua51934/replication_files/build/output"

** survival and readmission output 
local fpath_survreadm = "/disk/agedisk4/medicare.work/sacarny-DUA51934/shruthi-dua51934/replication_files/survreadm"


log using prepare.log, replace 

** Specify settings 
** which clinical conditions? 
global CONDITIONS "ami chf pnu stk hip hk"

** sample size 
local sample "100" 

** yidx 
global startyidx = -1 
global endyidx = 1 

** data start and end year 
global startyr = 2003
global endyr = 2014

** which sections of code to run? 
global REINDEX = 0
global MAKE_POOLED_DAT = 1




** merge the measures for each cohort into our hospital-year panel 
if $REINDEX ==1 {
	
foreach cond in $CONDITIONS { 
	
	di "Condition: `cond'" 

	* read in the surv/readm measures 
	use "`fpath_survreadm'/input/`cond'`sample'.dta", clear 

	
	label var yidx "year index (1 = 2012-2014)"
	
	* populate year variable 
	local n = $endyr - $startyr + 1 
	expand `n' 

	bys pn yidx: egen year = seq(), from($startyr) to($endyr) block(1) 
	
	* keep only if the last yidx matches 
	drop if yidx == -1 & !inrange(year, 2004, 2006) 
	drop if yidx == 0 & !inrange(year, 2008, 2010)
	drop if yidx == 1 & !inrange(year, 2012, 2014)


	* make the outcome variables missing for all but the last year 
	bys pn yidx: egen maxyr = max(year) 

	unab vars : rnra* rars* rhdx* r_`cond'_mean snra* sars* shdx* s_`cond'_mean sr_`cond'_npats
	tokenize `vars' 
	
	foreach v in `vars' {
	
	replace `v' = . if year != maxyr 	
	
	}

	drop maxyr   

	*merge in with the main analytic file 
	merge m:1 pn year using "`fpath_data_main'/acq_cleaned_complete_20230606", keepusing(id hospbd beds_tot) keep(using matched)

	* save 
	tempfile temp 
	save `temp', replace 

	* collapse aha id by year 
	collapse (rawsum) sr_`cond'_npats (mean) s_`cond'* shdx* sars* snra* r_`cond'* rhdx* rars* rnra* beds_tot [aw = hospbd], by(id year)

	* save 
	*
	save "`fpath_survreadm'/input/`cond'`sample'_acq.dta", replace 

} 
}



* create data for pooled regression
if $MAKE_POOLED_DAT == 1 {
	
	* bring in the main analytic file 
	use "`fpath_data_main'/acq_cleaned_complete_20230606", clear

	* bring in the surv/readm outcomes re-based to AHA ID 
	keep if year >= 2004
	foreach cond in $CONDITIONS {
		di "`c'"
		merge m:1 id year using "`fpath_survreadm'/input/`cond'`sample'_acq.dta", keep(match) nogen keepusing(rhdx_`cond'_fe shdx_`cond'_fe)
	} 

	* keep relevant variables to melt and save 	
	keep id year ind* forprofit hrrcode hospbd *fe
	sort id year 
	save "`fpath_survreadm'/input/stacked_temp_20230606.dta", replace


}

log close 
















